/*******************************************************************************
																				
	DESCRIPTION:  	This do file produces binned scatterplots of empirical JFR 
					on predicted JFR for the linear model.
	
*******************************************************************************/

clear all
global id_code 113
pause off
set seed 2110

/*******************************************************************************
*	1. Linear model
********************************************************************************/
local model Full		
local year 2006		

 * Load the data
use "${data}/003_MainWithEnsemblePred_`model'_`year'.dta", clear
keep if !missing(p_emplAft6M_0M_In)

gen DataYear = `year' // DataYear indicates which year individuals comes from

* Merge with linear predictions:
merge 1:1 LopNr_PersonNr InLnr ///
	using "${data}/116_Linear_Predictions_`model'_`year'.dta" ///
	, keep(3)

 * Obtain a scatter plot of empirical vs predicted JFR		
local model Linear

foreach months in 6 {
		  
	foreach unempl in 0 /* 6 12 */ {

		preserve 
		keep if `year'==year(startU + `unempl'*30)

		* Regress empirical JFR on predicted JFR and store coefficients and R-squared
		reg emplAft`months'M_`unempl'M_In p_emplAft`months'M_`unempl'M_In_`model'
		local b0 : display %4.2f _b[_cons]
		local b1 : display %4.2f _b[p_emplAft`months'M_`unempl'M_In_`model']
		local se0 : display %4.2f _se[_cons]
		local se1 : display %4.2f _se[p_emplAft`months'M_`unempl'M_In_`model']
		local r2 : display %5.3f `e(r2)'
		local n : display %9.0fc `e(N)'
		
		* Now we generate bins with equal mass (i.e., quantiles):
		xtile bin = p_emplAft`months'M_`unempl'M_In_`model', nq(20)
		
		* Calculated the average empirical JFR in each bin
		collapse (mean) emplAft`months'M_`unempl'M_In p_emplAft`months'M_`unempl'M_In_`model', by(bin)

		* Predict the JFR based on the coefficients from the regression and bins
		predict prediction_line 

		* Plot the average empirical JFR in each bin of predicted JFR
		twoway ///
			(line prediction_line p_emplAft`months'M_`unempl'M_In_`model', lcolor(orange_red)) (function y=x, lcolor(gs7) lpattern(dash)) ///
			(scatter emplAft`months'M_`unempl'M_In p_emplAft`months'M_`unempl'M_In_`model', mfcolor(ebblue) mlcolor(navy)), ///
			graphregion(color(white)) legend(order(1 "{bf:Regression Line}" - "Intercept = `b0' (`se0')" - "Slope = `b1' (`se1')" /* - "R{superscript:2} = `r2'" */ - "N = `n'") symxsize(*0.5) size(small) cols(1) pos(4) ring(0))			///
			ytitle("Average `months'-Month JFR `unempl' Months into Spell") ///
			xtitle("Predicted `months'-Month JFR `unempl' Months into Spell (`year' Sample, `model' Model)") ///
			ylabel(0(0.2)1, angle(0)) ///
			xscale(titlegap(2)) yscale(titlegap(2)) ///
			name(scatter_`months'M_`unempl'MIn_equal, replace)
		graph export "${output}/${id_code}_Predicted`months'MJFR_onEmpirical_`unempl'M_`model'_EqualSizedBins.pdf", as(pdf) replace 
		
		
		restore
	}
}

*******************************************************************************
 * Robustness: splitting into subsamples:
*******************************************************************************		

* Merge with sample data:

merge 1:1 LopNr_PersonNr InLnr using "${data}/001_9_FinalMainDataset.dta", ///
	 keepusing(Gender foreign EducLevel L_WageInc_adj L_OtherInc_adj L_FamInc_adj DaysUnemp_5Years DaysOnDI_5Years) assert(2 3) keep(3) nogen

* Generate the dummies:
local vars _
local model Full		
local year 2006		
local months 6 
local unempl 0
gen L_TotInc_adj = L_WageInc_adj + L_OtherInc_adj
xtile median_L_TotInc_adj = L_TotInc_adj, nq(2) 
xtile dec_L_TotInc_adj = L_TotInc_adj, nq(10) 


xtile median_EducLevel = EducLevel, nq(2)

xtile median_DaysUnemp_5Years = DaysUnemp_5Years, nq(2)

xtile median_DaysOnDI_5Years = DaysOnDI_5Years, nq(2)

label define median_lab 1 "Below Median" 2 "Above Median"

label variable median_L_TotInc_adj "Individual Income"
label variable median_EducLevel "Education Level"
label variable median_DaysUnemp_5Years "Days on UI in 5 Years Before Spell"
label variable median_DaysOnDI_5Years "Days on DI in 5 Years Before Spell"


label values median_L_TotInc_adj median_lab
label values median_EducLevel median_lab
label values median_DaysUnemp_5Years median_lab
label values median_DaysOnDI_5Years median_lab


*******************************************************************************
 * Robustness: by 160 groups by observables
*******************************************************************************		
pause off
* First, regress observed JFR on predicted JFR:
local vars _
local model Linear		
local year 2006		
local months 6 
foreach unempl in 0 /* 6 12 */ {
	frame copy default bins_`unempl'M, replace
	frame change bins_`unempl'M

	keep if `year'==year(startU + `unempl'*30) & !missing(emplAft`months'M_`unempl'M_In, p_emplAft`months'M_`unempl'M_In_`model')
	
	* Calculated the average empirical JFR in each group
	collapse (count) InLnr (mean) emplAft`months'M_`unempl'M_In p_emplAft`months'M_`unempl'M_In_`model', by(dec_L_TotInc_adj Gender median_DaysUnemp_5Years median_DaysOnDI_5Years foreign)
	
	* Regress mean empirical JFR on mean predicted JFR and store coefficients and R-squared
	reg emplAft`months'M_`unempl'M_In p_emplAft`months'M_`unempl'M_In_`model' [aweight = InLnr]
	local b0 : display %4.2f _b[_cons]
	local b1 : display %4.2f _b[p_emplAft`months'M_`unempl'M_In_`model']
	local se0 : display %4.2f _se[_cons]
	local se1 : display %4.2f _se[p_emplAft`months'M_`unempl'M_In_`model']
	local r2 : display %5.3f `e(r2)'
	local n : display %9.0fc `e(N)'

	* Predict the JFR based on the coefficients from the regression and bins
	predict prediction_line 

	* Plot the average empirical JFR in each bin of predicted JFR
	twoway ///
		(line prediction_line p_emplAft`months'M_`unempl'M_In_`model', lcolor(orange_red)) (function y=x, lcolor(gs7) lpattern(dash)) ///
		(scatter emplAft`months'M_`unempl'M_In p_emplAft`months'M_`unempl'M_In_`model', mfcolor(ebblue) mlcolor(navy)), ///
		graphregion(color(white)) legend(order(1 "{bf:Regression Line}" - "Intercept = `b0' (`se0')" - "Slope = `b1' (`se1')" /* - "R{superscript:2} = `r2'" */ - "N = `n'") symxsize(*0.5) size(small) cols(1) pos(4) ring(0))			///
		ytitle("Average `months'-Month JFR `unempl' Months into Spell") ///
		xtitle("Predicted `months'-Month JFR `unempl' Months into Spell (`year' Sample, `model' Model)") ///
		ylabel(0(0.2)1, angle(0)) ///
		xscale(titlegap(2)) yscale(titlegap(2)) ///
		name(scatter_`months'M_`unempl'MIn_160groups, replace)
	
	graph export "${output}/${id_code}_Predicted`months'MJFR_onEmpirical_`unempl'M_`model'_144Groups.pdf", as(pdf) replace 
	pause
	
	frame change default
}

